This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Introduction

Objectives

Install R libraries

if(!require(readr)) install.packages("readr")
## Loading required package: readr
## Warning: package 'readr' was built under R version 4.1.2
if(!require(dplyr)) install.packages("dplyr")
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!require(DT)) install.packages("DT")
## Loading required package: DT
## Warning: package 'DT' was built under R version 4.1.2
if(!require(ggrepel)) install.packages("ggrepel")
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 4.1.2
## Loading required package: ggplot2
if(!require(leaflet)) install.packages("leaflet")
## Loading required package: leaflet
## Warning: package 'leaflet' was built under R version 4.1.2

the Crime Data

About the Data

  • IncidntNum (T) Incident number
  • Category (T) Crime category, i.e., larceny/theft
  • Descript (T)
  • DayOfWeek (T)
  • Date (D Date: DD/MM/YYYY
  • Time (T) Time: 24-hour system
  • PdDistrict (T) Police district where incident occured
  • Resolution (T) Resolution of the crime
  • Address (T) Address of the crime
  • X (N) Longitude
  • Y (N) Latitude
  • Location (T) Lat/long
  • PdId (N) Police Department ID

Read the data

Load the data using readr and read_csv().

# path <- "https://github.com/stricje1/VIT_University/blob/master/Crime_Analysis_Mapping/data/Chennai_crime_4yr.zip"
path <- "C:\\Users\\jeff\\Documents\\Books\\Crime Analysis\\India_data\\chennai_crimes.csv"
df <- read_csv(path)
## Rows: 499365 Columns: 13
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (8): Category, Descript, DayOfWeek, Date, PdDistrict, Resolution, Addre...
## dbl  (4): IncidntNum, X, Y, PdId
## time (1): Time
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

Display Data

Display the data using DT and datatable().

library(DT)
df_sub <- df[1:100,]  # display the first 100 rows
df_sub$Time <- as.character(df_sub$Time) 
datatable(df_sub, options = list(pageLength = 5,scrollX='400px'))
sprintf("Number of Rows in Dataframe: %s", format(nrow(df),big.mark = ","))
## [1] "Number of Rows in Dataframe: 499,365"

Preprocess Data

The All-Caps text is difficult to read. Let’s force the text in the appropriate columns into proper case.

proper_case <- function(x) {
  return (gsub("\\b([A-Z])([A-Z]+)", "\\U\\1\\L\\2" , x, perl=TRUE))
}

library(dplyr)
df <- df %>% mutate(Category = proper_case(Category),
                    Descript = proper_case(Descript),
                    PdDistrict = proper_case(PdDistrict),
                    Resolution = proper_case(Resolution),
                    Time = as.character(Time))
df_sub <- df[1:100,]  # display the first 100 rows
datatable(df_sub, options = list(pageLength = 5,scrollX='400px'))

Visualize Data

Crime across space

library(leaflet)

data <- df[1:100000,] # display the first 10,000 rows
data$popup <- paste("<b>Incident #: </b>", data$IncidntNum, "<br>", "<b>Category: </b>", data$Category,
                    "<br>", "<b>Description: </b>", data$Descript,
                    "<br>", "<b>Day of week: </b>", data$DayOfWeek,
                    "<br>", "<b>Date: </b>", data$Date,
                    "<br>", "<b>Time: </b>", data$Time,
                    "<br>", "<b>PD district: </b>", data$PdDistrict,
                    "<br>", "<b>Resolution: </b>", data$Resolution,
                    "<br>", "<b>Address: </b>", data$Address,
                    "<br>", "<b>Longitude: </b>", data$X,
                    "<br>", "<b>Latitude: </b>", data$Y)
leaflet(data, width = "100%") %>% addTiles() %>%
  addTiles(group = "OSM (default)") %>%
  addProviderTiles(provider = "Esri.WorldStreetMap",group = "World StreetMap") %>%
  addProviderTiles(provider = "Esri.WorldImagery",group = "World Imagery") %>%
  # addProviderTiles(provider = "NASAGIBS.ViirsEarthAtNight2012",group = "Nighttime Imagery") %>%
  addMarkers(lng = ~X, lat = ~Y, popup = data$popup, clusterOptions = markerClusterOptions()) %>%
  addLayersControl(
    baseGroups = c("OSM (default)","World StreetMap", "World Imagery"),
    options = layersControlOptions(collapsed = FALSE)
  )

In this manner, we can click icons on the map to show incident details. We need to set up some generate some parameters that we concatenate or “paste” together to form these incident descriptions. For example, the concatenated strings pdata$popup, provides the content of the second incident as shown here:

You may notice the “%>%” or forward-pipe operator in the leaflet arguments. The operators pipe their left-hand side values forward into expressions that appear on the right-hand side, rather than from the inside and out.

data$popup[1]
## [1] "<b>Incident #: </b> 150098210 <br> <b>Category: </b> Robbery <br> <b>Description: </b> Robbery, Bodily Force <br> <b>Day of week: </b> Sunday <br> <b>Date: </b> 2/1/2015 <br> <b>Time: </b> 15:45:00 <br> <b>PD district: </b> Zone4 <br> <b>Resolution: </b> None <br> <b>Address: </b> None <br> <b>Longitude: </b> 80.26669397 <br> <b>Latitude: </b> 13.09199072"

Crime Over Time

That was not meant to rhyme, but I like it. In this section, we will manipulate the data using the dplyr::mutate function. mutate adds new variables while preserving extisting variables. Below, we used “shades of bue” in the code for our plot, with a dark blue line that smooths the data.

library(dplyr)

df_crime_daily <- df %>%
      mutate(Date = as.Date(Date, "%m/%d/%Y")) %>%
      group_by(Date) %>%
      summarize(count = n()) %>%
      arrange(Date)

Daily Crimes Plot with Variance

library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
plot <- ggplot(df_crime_daily, aes(x = Date, y = count)) +
      geom_line(color = "#F2CA27", size = 0.1) +
      geom_smooth(color = "#1A1A1A") +
      # fte_theme() +
      scale_x_date(breaks = date_breaks("1 year"), labels = date_format("%Y")) +
      labs(x = "Date of Crime", y = "Number of Crimes", title = "Daily Crimes in Chennai from 2009 - 2018")
    plot
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

## Aggregate Data

df_category <- sort(table(df$Category),decreasing = TRUE)
df_category <- data.frame(df_category[df_category > 1000])
colnames(df_category) <- c("Category", "Frequency")
df_category$Percentage <- df_category$Frequency / sum(df_category$Frequency)
datatable(df_category, options = list(scrollX='400px'))

Create a Bar Chart

library(ggplot2)
library(ggrepel)
bp<-ggplot(df_category, aes(x=Category, y=Frequency, fill=Category)) + geom_bar(stat="identity") + 
  theme(axis.text.x=element_blank()) + geom_text_repel(data=df_category, aes(label=Category))
bp

## Create a pie chart based on the incident category.

bp<-ggplot(df_category, aes(x="", y=Percentage, fill=Category)) + geom_bar(stat="identity") 
pie <- bp + coord_polar("y") 
pie

# Temporal Trends ## Theft Over Time

df_theft <- df %>% filter(grepl("Larceny/Theft", Category))

df_theft_daily <- df_theft %>%
  mutate(Date = as.Date(Date, "%m/%d/%Y")) %>%
  group_by(Date) %>%
  summarize(count = n()) %>%
  arrange(Date)

Here, we set order of month facets by chronological order instead of alphabetical.

df_arrest_time_month$DayOfWeek <- factor(df_arrest_time_month$DayOfWeek, level = rev(dow_format))
df_arrest_time_month$Hour <- factor(df_arrest_time_month$Hour, level = 0:23, label = hour_format)
df_arrest_time_month$Month <- factor(df_arrest_time_month$Month,
 level = c("January","February","March","April","May","June","July","August","September","October","November","December"))

Plot of Factor by Month

plot <- ggplot(df_arrest_time_month, aes(x = Hour, y = DayOfWeek, fill = norm)) +
  geom_tile() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.6, size = 4)) +
  labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests in Chennai from 2009 to 2018 by Time of Arrest, Normalized by Month") +
  scale_fill_gradient(low = "#9bfdff", high = "#4401ff") +
  facet_wrap(~ Month, nrow = 4)
plot

## Factor By Year #what if things changed overtime?

df_arrest_time_year <- df_arrest %>%
  mutate(Year = format(as.Date(Date, "%m/%d/%Y"), "%Y"), Hour = sapply(Time, get_hour)) %>%
  group_by(Year, DayOfWeek, Hour) %>% 
  summarize(count = n()) %>%
  group_by(Year) %>%
  mutate(norm = count/sum(count))
## `summarise()` has grouped output by 'Year', 'DayOfWeek'. You can override using the `.groups` argument.
df_arrest_time_year$DayOfWeek <- factor(df_arrest_time_year$DayOfWeek, level = rev(dow_format))
df_arrest_time_year$Hour <- factor(df_arrest_time_year$Hour, level = 0:23, label = hour_format)

Police Arrest Normalized by YEar

plot <- ggplot(df_arrest_time_year, aes(x = Hour, y = DayOfWeek, fill = norm)) +
  geom_tile() +
  # fte_theme() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.6, size = 4)) +
  labs(x = "Hour of Arrest (Local Time)", y = "Day of Week of Arrest", title = "Police Arrests in Chennai from 2014 to 2018 by Time of Arrest, Normalized by Year") +
  scale_fill_gradient(low = "#01ff44", high = "#00340e") +
  facet_wrap(~ Year, nrow = 6)
plot

## Works CIted